<?php

declare(strict_types=1);

namespace Doctrine\SqlFormatter\Tests;

use Doctrine\SqlFormatter\Cursor;
use Doctrine\SqlFormatter\Token;
use Doctrine\SqlFormatter\Tokenizer;
use Generator;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;
use ReflectionClass;

use function array_diff;
use function array_diff_key;
use function array_filter;
use function array_intersect;
use function array_merge;
use function array_unique;
use function implode;
use function preg_match;
use function serialize;
use function sort;
use function strtoupper;

final class TokenizerTest extends TestCase
{
    /**
     * Based on https://mariadb.com/kb/en/reserved-words/ list.
     *
     * All these keywords must be quoted.
     */
    private const KEYWORDS_RESERVED_MARIADB = [
        'ACCESSIBLE',
        'ADD',
        'ALL',
        'ALTER',
        'ANALYZE',
        'AND',
        'AS',
        'ASC',
        'ASENSITIVE',
        'BEFORE',
        'BETWEEN',
        'BIGINT',
        'BINARY',
        'BLOB',
        'BOTH',
        'BY',
        'CALL',
        'CASCADE',
        'CASE',
        'CHANGE',
        'CHAR',
        'CHARACTER',
        'CHECK',
        'COLLATE',
        'COLUMN',
        'CONDITION',
        'CONSTRAINT',
        'CONTINUE',
        'CONVERT',
        'CREATE',
        'CROSS',
        'CURRENT_DATE',
        'CURRENT_ROLE',
        'CURRENT_TIME',
        'CURRENT_TIMESTAMP',
        'CURRENT_USER',
        'CURSOR',
        'DATABASE',
        'DATABASES',
        'DAY_HOUR',
        'DAY_MICROSECOND',
        'DAY_MINUTE',
        'DAY_SECOND',
        'DEC',
        'DECIMAL',
        'DECLARE',
        'DEFAULT',
        'DELAYED',
        'DELETE',
        'DELETE_DOMAIN_ID',
        'DESC',
        'DESCRIBE',
        'DETERMINISTIC',
        'DISTINCT',
        'DISTINCTROW',
        'DIV',
        'DOUBLE',
        'DO_DOMAIN_IDS',
        'DROP',
        'DUAL',
        'EACH',
        'ELSE',
        'ELSEIF',
        'ENCLOSED',
        'ESCAPED',
        'EXCEPT',
        'EXISTS',
        'EXIT',
        'EXPLAIN',
        'FALSE',
        'FETCH',
        'FLOAT',
        'FLOAT4',
        'FLOAT8',
        'FOR',
        'FORCE',
        'FOREIGN',
        'FROM',
        'FULLTEXT',
        'GENERAL',
        'GRANT',
        'GROUP',
        'HAVING',
        'HIGH_PRIORITY',
        'HOUR_MICROSECOND',
        'HOUR_MINUTE',
        'HOUR_SECOND',
        'IF',
        'IGNORE',
        'IGNORE_DOMAIN_IDS',
        'IGNORE_SERVER_IDS',
        'IN',
        'INDEX',
        'INFILE',
        'INNER',
        'INOUT',
        'INSENSITIVE',
        'INSERT',
        'INT',
        'INT1',
        'INT2',
        'INT3',
        'INT4',
        'INT8',
        'INTEGER',
        'INTERSECT',
        'INTERVAL',
        'INTO',
        'IS',
        'ITERATE',
        'JOIN',
        'KEY',
        'KEYS',
        'KILL',
        'LEADING',
        'LEAVE',
        'LEFT',
        'LIKE',
        'LIMIT',
        'LINEAR',
        'LINES',
        'LOAD',
        'LOCALTIME',
        'LOCALTIMESTAMP',
        'LOCK',
        'LONG',
        'LONGBLOB',
        'LONGTEXT',
        'LOOP',
        'LOW_PRIORITY',
        'MASTER_HEARTBEAT_PERIOD',
        'MASTER_SSL_VERIFY_SERVER_CERT',
        'MATCH',
        'MAXVALUE',
        'MEDIUMBLOB',
        'MEDIUMINT',
        'MEDIUMTEXT',
        'MIDDLEINT',
        'MINUTE_MICROSECOND',
        'MINUTE_SECOND',
        'MOD',
        'MODIFIES',
        'NATURAL',
        'NOT',
        'NO_WRITE_TO_BINLOG',
        'NULL',
        'NUMERIC',
        'OFFSET',
        'ON',
        'OPTIMIZE',
        'OPTION',
        'OPTIONALLY',
        'OR',
        'ORDER',
        'OUT',
        'OUTER',
        'OUTFILE',
        'OVER',
        'PAGE_CHECKSUM',
        'PARSE_VCOL_EXPR',
        'PARTITION',
        'PRECISION',
        'PRIMARY',
        'PROCEDURE',
        'PURGE',
        'RANGE',
        'READ',
        'READS',
        'READ_WRITE',
        'REAL',
        'RECURSIVE',
        'REFERENCES',
        'REF_SYSTEM_ID',
        'REGEXP',
        'RELEASE',
        'RENAME',
        'REPEAT',
        'REPLACE',
        'REQUIRE',
        'RESIGNAL',
        'RESTRICT',
        'RETURN',
        'RETURNING',
        'REVOKE',
        'RIGHT',
        'RLIKE',
        'ROWS',
        'ROW_NUMBER',
        'SCHEMA',
        'SCHEMAS',
        'SECOND_MICROSECOND',
        'SELECT',
        'SENSITIVE',
        'SEPARATOR',
        'SET',
        'SHOW',
        'SIGNAL',
        'SLOW',
        'SMALLINT',
        'SPATIAL',
        'SPECIFIC',
        'SQL',
        'SQLEXCEPTION',
        'SQLSTATE',
        'SQLWARNING',
        'SQL_BIG_RESULT',
        'SQL_CALC_FOUND_ROWS',
        'SQL_SMALL_RESULT',
        'SSL',
        'STARTING',
        'STATS_AUTO_RECALC',
        'STATS_PERSISTENT',
        'STATS_SAMPLE_PAGES',
        'STRAIGHT_JOIN',
        'TABLE',
        'TERMINATED',
        'THEN',
        'TINYBLOB',
        'TINYINT',
        'TINYTEXT',
        'TO',
        'TRAILING',
        'TRIGGER',
        'TRUE',
        'UNDO',
        'UNION',
        'UNIQUE',
        'UNLOCK',
        'UNSIGNED',
        'UPDATE',
        'USAGE',
        'USE',
        'USING',
        'UTC_DATE',
        'UTC_TIME',
        'UTC_TIMESTAMP',
        'VALUES',
        'VARBINARY',
        'VARCHAR',
        'VARCHARACTER',
        'VARYING',
        'WHEN',
        'WHERE',
        'WHILE',
        'WINDOW',
        'WITH',
        'WRITE',
        'XOR',
        'YEAR_MONTH',
        'ZEROFILL',
    ];

    /**
     * Based on https://learn.microsoft.com/en-us/sql/t-sql/language-elements/reserved-keywords-transact-sql?view=sql-server-ver16 list.
     *
     * All these keywords must be quoted.
     */
    private const KEYWORDS_RESERVED_MSSQL = [
        'ADD',
        'ALL',
        'ALTER',
        'AND',
        'ANY',
        'AS',
        'ASC',
        'AUTHORIZATION',
        'BACKUP',
        'BEGIN',
        'BETWEEN',
        'BREAK',
        'BROWSE',
        'BULK',
        'BY',
        'CASCADE',
        'CASE',
        'CHECK',
        'CHECKPOINT',
        'CLOSE',
        'CLUSTERED',
        'COALESCE',
        'COLLATE',
        'COLUMN',
        'COMMIT',
        'COMPUTE',
        'CONSTRAINT',
        'CONTAINS',
        'CONTAINSTABLE',
        'CONTINUE',
        'CONVERT',
        'CREATE',
        'CROSS',
        'CURRENT',
        'CURRENT_DATE',
        'CURRENT_TIME',
        'CURRENT_TIMESTAMP',
        'CURRENT_USER',
        'CURSOR',
        'DATABASE',
        'DBCC',
        'DEALLOCATE',
        'DECLARE',
        'DEFAULT',
        'DELETE',
        'DENY',
        'DESC',
        'DISK',
        'DISTINCT',
        'DISTRIBUTED',
        'DOUBLE',
        'DROP',
        'DUMP',
        'ELSE',
        'END',
        'ERRLVL',
        'ESCAPE',
        'EXCEPT',
        'EXEC',
        'EXECUTE',
        'EXISTS',
        'EXIT',
        'EXTERNAL',
        'FETCH',
        'FILE',
        'FILLFACTOR',
        'FOR',
        'FOREIGN',
        'FREETEXT',
        'FREETEXTTABLE',
        'FROM',
        'FULL',
        'FUNCTION',
        'GOTO',
        'GRANT',
        'GROUP',
        'HAVING',
        'HOLDLOCK',
        'IDENTITY',
        'IDENTITYCOL',
        'IDENTITY_INSERT',
        'IF',
        'IN',
        'INDEX',
        'INNER',
        'INSERT',
        'INTERSECT',
        'INTO',
        'IS',
        'JOIN',
        'KEY',
        'KILL',
        'LEFT',
        'LIKE',
        'LINENO',
        'LOAD',
        'MERGE',
        'NATIONAL',
        'NOCHECK',
        'NONCLUSTERED',
        'NOT',
        'NULL',
        'NULLIF',
        'OF',
        'OFF',
        'OFFSETS',
        'ON',
        'OPEN',
        'OPENDATASOURCE',
        'OPENQUERY',
        'OPENROWSET',
        'OPENXML',
        'OPTION',
        'OR',
        'ORDER',
        'OUTER',
        'OVER',
        'PERCENT',
        'PIVOT',
        'PLAN',
        'PRECISION',
        'PRIMARY',
        'PRINT',
        'PROC',
        'PROCEDURE',
        'PUBLIC',
        'RAISERROR',
        'READ',
        'READTEXT',
        'RECONFIGURE',
        'REFERENCES',
        'REPLICATION',
        'RESTORE',
        'RESTRICT',
        'RETURN',
        'REVERT',
        'REVOKE',
        'RIGHT',
        'ROLLBACK',
        'ROWCOUNT',
        'ROWGUIDCOL',
        'RULE',
        'SAVE',
        'SCHEMA',
        'SECURITYAUDIT',
        'SELECT',
        'SEMANTICKEYPHRASETABLE',
        'SEMANTICSIMILARITYDETAILSTABLE',
        'SEMANTICSIMILARITYTABLE',
        'SESSION_USER',
        'SET',
        'SETUSER',
        'SHUTDOWN',
        'SOME',
        'STATISTICS',
        'SYSTEM_USER',
        'TABLE',
        'TABLESAMPLE',
        'TEXTSIZE',
        'THEN',
        'TO',
        'TOP',
        'TRAN',
        'TRANSACTION',
        'TRIGGER',
        'TRUNCATE',
        'TRY_CONVERT',
        'TSEQUAL',
        'UNION',
        'UNIQUE',
        'UNPIVOT',
        'UPDATE',
        'UPDATETEXT',
        'USE',
        'USER',
        'VALUES',
        'VARYING',
        'VIEW',
        'WAITFOR',
        'WHEN',
        'WHERE',
        'WHILE',
        'WITH',
        'WITHIN GROUP',
        'WRITETEXT',
    ];

    /**
     * Based on https://dev.mysql.com/doc/refman/8.4/en/keywords.html list.
     *
     * All these keywords must be quoted.
     */
    private const KEYWORDS_RESERVED_MYSQL = [
        'ACCESSIBLE',
        'ADD',
        'ALL',
        'ALTER',
        'ANALYZE',
        'AND',
        'AS',
        'ASC',
        'ASENSITIVE',
        'BEFORE',
        'BETWEEN',
        'BIGINT',
        'BINARY',
        'BLOB',
        'BOTH',
        'BY',
        'CALL',
        'CASCADE',
        'CASE',
        'CHANGE',
        'CHAR',
        'CHARACTER',
        'CHECK',
        'COLLATE',
        'COLUMN',
        'CONDITION',
        'CONSTRAINT',
        'CONTINUE',
        'CONVERT',
        'CREATE',
        'CROSS',
        'CUBE',
        'CUME_DIST',
        'CURRENT_DATE',
        'CURRENT_TIME',
        'CURRENT_TIMESTAMP',
        'CURRENT_USER',
        'CURSOR',
        'DATABASE',
        'DATABASES',
        'DAY_HOUR',
        'DAY_MICROSECOND',
        'DAY_MINUTE',
        'DAY_SECOND',
        'DEC',
        'DECIMAL',
        'DECLARE',
        'DEFAULT',
        'DELAYED',
        'DELETE',
        'DENSE_RANK',
        'DESC',
        'DESCRIBE',
        'DETERMINISTIC',
        'DISTINCT',
        'DISTINCTROW',
        'DIV',
        'DOUBLE',
        'DROP',
        'DUAL',
        'EACH',
        'ELSE',
        'ELSEIF',
        'EMPTY',
        'ENCLOSED',
        'ESCAPED',
        'EXCEPT',
        'EXISTS',
        'EXIT',
        'EXPLAIN',
        'FALSE',
        'FETCH',
        'FIRST_VALUE',
        'FLOAT',
        'FLOAT4',
        'FLOAT8',
        'FOR',
        'FORCE',
        'FOREIGN',
        'FROM',
        'FULLTEXT',
        'FUNCTION',
        'GENERATED',
        'GET',
        'GRANT',
        'GROUP',
        'GROUPING',
        'GROUPS',
        'HAVING',
        'HIGH_PRIORITY',
        'HOUR_MICROSECOND',
        'HOUR_MINUTE',
        'HOUR_SECOND',
        'IF',
        'IGNORE',
        'IN',
        'INDEX',
        'INFILE',
        'INNER',
        'INOUT',
        'INSENSITIVE',
        'INSERT',
        'INT',
        'INT1',
        'INT2',
        'INT3',
        'INT4',
        'INT8',
        'INTEGER',
        'INTERSECT',
        'INTERVAL',
        'INTO',
        'IO_AFTER_GTIDS',
        'IO_BEFORE_GTIDS',
        'IS',
        'ITERATE',
        'JOIN',
        'JSON_TABLE',
        'KEY',
        'KEYS',
        'KILL',
        'LAG',
        'LAST_VALUE',
        'LATERAL',
        'LEAD',
        'LEADING',
        'LEAVE',
        'LEFT',
        'LIKE',
        'LIMIT',
        'LINEAR',
        'LINES',
        'LOAD',
        'LOCALTIME',
        'LOCALTIMESTAMP',
        'LOCK',
        'LONG',
        'LONGBLOB',
        'LONGTEXT',
        'LOOP',
        'LOW_PRIORITY',
        'MANUAL',
        'MATCH',
        'MAXVALUE',
        'MEDIUMBLOB',
        'MEDIUMINT',
        'MEDIUMTEXT',
        'MIDDLEINT',
        'MINUTE_MICROSECOND',
        'MINUTE_SECOND',
        'MOD',
        'MODIFIES',
        'NATURAL',
        'NOT',
        'NO_WRITE_TO_BINLOG',
        'NTH_VALUE',
        'NTILE',
        'NULL',
        'NUMERIC',
        'OF',
        'ON',
        'OPTIMIZE',
        'OPTIMIZER_COSTS',
        'OPTION',
        'OPTIONALLY',
        'OR',
        'ORDER',
        'OUT',
        'OUTER',
        'OUTFILE',
        'OVER',
        'PARALLEL',
        'PARTITION',
        'PERCENT_RANK',
        'PRECISION',
        'PRIMARY',
        'PROCEDURE',
        'PURGE',
        'QUALIFY',
        'RANGE',
        'RANK',
        'READ',
        'READS',
        'READ_WRITE',
        'REAL',
        'RECURSIVE',
        'REFERENCES',
        'REGEXP',
        'RELEASE',
        'RENAME',
        'REPEAT',
        'REPLACE',
        'REQUIRE',
        'RESIGNAL',
        'RESTRICT',
        'RETURN',
        'REVOKE',
        'RIGHT',
        'RLIKE',
        'ROW',
        'ROWS',
        'ROW_NUMBER',
        'SCHEMA',
        'SCHEMAS',
        'SECOND_MICROSECOND',
        'SELECT',
        'SENSITIVE',
        'SEPARATOR',
        'SET',
        'SHOW',
        'SIGNAL',
        'SMALLINT',
        'SPATIAL',
        'SPECIFIC',
        'SQL',
        'SQLEXCEPTION',
        'SQLSTATE',
        'SQLWARNING',
        'SQL_BIG_RESULT',
        'SQL_CALC_FOUND_ROWS',
        'SQL_SMALL_RESULT',
        'SSL',
        'STARTING',
        'STORED',
        'STRAIGHT_JOIN',
        'SYSTEM',
        'TABLE',
        'TABLESAMPLE',
        'TERMINATED',
        'THEN',
        'TINYBLOB',
        'TINYINT',
        'TINYTEXT',
        'TO',
        'TRAILING',
        'TRIGGER',
        'TRUE',
        'UNDO',
        'UNION',
        'UNIQUE',
        'UNLOCK',
        'UNSIGNED',
        'UPDATE',
        'USAGE',
        'USE',
        'USING',
        'UTC_DATE',
        'UTC_TIME',
        'UTC_TIMESTAMP',
        'VALUES',
        'VARBINARY',
        'VARCHAR',
        'VARCHARACTER',
        'VARYING',
        'VIRTUAL',
        'WHEN',
        'WHERE',
        'WHILE',
        'WINDOW',
        'WITH',
        'WRITE',
        'XOR',
        'YEAR_MONTH',
        'ZEROFILL',
    ];

    /**
     * Based on https://docs.oracle.com/en/database/oracle/oracle-database/23/sqlrf/Oracle-SQL-Reserved-Words.html list.
     *
     * All these keywords must be quoted.
     */
    private const KEYWORDS_RESERVED_ORACLE = [
        'ACCESS',
        'ADD',
        'ALL',
        'ALTER',
        'AND',
        'ANY',
        'AS',
        'ASC',
        'AUDIT',
        'BETWEEN',
        'BY',
        'CHAR',
        'CHECK',
        'CLUSTER',
        'COLUMN',
        'COMMENT',
        'COMPRESS',
        'CONNECT',
        'CREATE',
        'CURRENT',
        'DATE',
        'DECIMAL',
        'DEFAULT',
        'DELETE',
        'DESC',
        'DISTINCT',
        'DROP',
        'ELSE',
        'EXCLUSIVE',
        'EXISTS',
        'FILE',
        'FLOAT',
        'FOR',
        'FROM',
        'GRANT',
        'GROUP',
        'HAVING',
        'IDENTIFIED',
        'IMMEDIATE',
        'IN',
        'INCREMENT',
        'INDEX',
        'INITIAL',
        'INSERT',
        'INTEGER',
        'INTERSECT',
        'INTO',
        'IS',
        'LEVEL',
        'LIKE',
        'LOCK',
        'LONG',
        'MAXEXTENTS',
        'MINUS',
        'MLSLABEL',
        'MODE',
        'MODIFY',
        'NOAUDIT',
        'NOCOMPRESS',
        'NOT',
        'NOWAIT',
        'NULL',
        'NUMBER',
        'OF',
        'OFFLINE',
        'ON',
        'ONLINE',
        'OPTION',
        'OR',
        'ORDER',
        'PCTFREE',
        'PRIOR',
        'PUBLIC',
        'RAW',
        'RENAME',
        'RESOURCE',
        'REVOKE',
        'ROW',
        'ROWID',
        'ROWNUM',
        'ROWS',
        'SELECT',
        'SESSION',
        'SET',
        'SHARE',
        'SIZE',
        'SMALLINT',
        'START',
        'SUCCESSFUL',
        'SYNONYM',
        'SYSDATE',
        'TABLE',
        'THEN',
        'TO',
        'TRIGGER',
        'UID',
        'UNION',
        'UNIQUE',
        'UPDATE',
        'USER',
        'VALIDATE',
        'VALUES',
        'VARCHAR',
        'VARCHAR2',
        'VIEW',
        'WHENEVER',
        'WHERE',
        'WITH',
    ];

    /**
     * Based on https://www.postgresql.org/docs/16/sql-keywords-appendix.html list.
     *
     * All these keywords must be quoted.
     */
    private const KEYWORDS_RESERVED_POSTGRESQL = [
        'ALL',
        'ANALYSE',
        'ANALYZE',
        'AND',
        'ANY',
        'ARRAY',
        'AS',
        'ASC',
        'ASYMMETRIC',
        'AUTHORIZATION',
        'BINARY',
        'BOTH',
        'CASE',
        'CAST',
        'CHECK',
        'COLLATE',
        'COLLATION',
        'COLUMN',
        'CONCURRENTLY',
        'CONSTRAINT',
        'CREATE',
        'CROSS',
        'CURRENT_CATALOG',
        'CURRENT_DATE',
        'CURRENT_ROLE',
        'CURRENT_SCHEMA',
        'CURRENT_TIME',
        'CURRENT_TIMESTAMP',
        'CURRENT_USER',
        'DEFAULT',
        'DEFERRABLE',
        'DESC',
        'DISTINCT',
        'DO',
        'ELSE',
        'END',
        'EXCEPT',
        'FALSE',
        'FETCH',
        'FOR',
        'FOREIGN',
        'FREEZE',
        'FROM',
        'FULL',
        'GRANT',
        'GROUP',
        'HAVING',
        'ILIKE',
        'IN',
        'INITIALLY',
        'INNER',
        'INTERSECT',
        'INTO',
        'IS',
        'ISNULL',
        'JOIN',
        'LATERAL',
        'LEADING',
        'LEFT',
        'LIKE',
        'LIMIT',
        'LOCALTIME',
        'LOCALTIMESTAMP',
        'NATURAL',
        'NOT',
        'NOTNULL',
        'NULL',
        'OFFSET',
        'ON',
        'ONLY',
        'OR',
        'ORDER',
        'OUTER',
        'OVERLAPS',
        'PLACING',
        'PRIMARY',
        'REFERENCES',
        'RETURNING',
        'RIGHT',
        'SELECT',
        'SESSION_USER',
        'SIMILAR',
        'SOME',
        'SYMMETRIC',
        'SYSTEM_USER',
        'TABLE',
        'TABLESAMPLE',
        'THEN',
        'TO',
        'TRAILING',
        'TRUE',
        'UNION',
        'UNIQUE',
        'USER',
        'USING',
        'VARIADIC',
        'VERBOSE',
        'WHEN',
        'WHERE',
        'WINDOW',
        'WITH',
    ];

    /**
     * Based on https://www.postgresql.org/docs/16/sql-keywords-appendix.html list.
     *
     * All these keywords must be quoted.
     */
    private const KEYWORDS_RESERVED_SQL2023 = [
        'ABS',
        'ABSENT',
        'ACOS',
        'ALL',
        'ALLOCATE',
        'ALTER',
        'AND',
        'ANY',
        'ANY_VALUE',
        'ARE',
        'ARRAY',
        'ARRAY_AGG',
        'ARRAY_MAX_CARDINALITY',
        'AS',
        'ASENSITIVE',
        'ASIN',
        'ASYMMETRIC',
        'AT',
        'ATAN',
        'ATOMIC',
        'AUTHORIZATION',
        'AVG',
        'BEGIN',
        'BEGIN_FRAME',
        'BEGIN_PARTITION',
        'BETWEEN',
        'BIGINT',
        'BINARY',
        'BLOB',
        'BOOLEAN',
        'BOTH',
        'BTRIM',
        'BY',
        'CALL',
        'CALLED',
        'CARDINALITY',
        'CASCADED',
        'CASE',
        'CAST',
        'CEIL',
        'CEILING',
        'CHAR',
        'CHARACTER',
        'CHARACTER_LENGTH',
        'CHAR_LENGTH',
        'CHECK',
        'CLASSIFIER',
        'CLOB',
        'CLOSE',
        'COALESCE',
        'COLLATE',
        'COLLECT',
        'COLUMN',
        'COMMIT',
        'CONDITION',
        'CONNECT',
        'CONSTRAINT',
        'CONTAINS',
        'CONVERT',
        'COPY',
        'CORR',
        'CORRESPONDING',
        'COS',
        'COSH',
        'COUNT',
        'COVAR_POP',
        'COVAR_SAMP',
        'CREATE',
        'CROSS',
        'CUBE',
        'CUME_DIST',
        'CURRENT',
        'CURRENT_CATALOG',
        'CURRENT_DATE',
        'CURRENT_DEFAULT_TRANSFORM_GROUP',
        'CURRENT_PATH',
        'CURRENT_ROLE',
        'CURRENT_ROW',
        'CURRENT_SCHEMA',
        'CURRENT_TIME',
        'CURRENT_TIMESTAMP',
        'CURRENT_TRANSFORM_GROUP_FOR_TYPE',
        'CURRENT_USER',
        'CURSOR',
        'CYCLE',
        'DATE',
        'DAY',
        'DEALLOCATE',
        'DEC',
        'DECFLOAT',
        'DECIMAL',
        'DECLARE',
        'DEFAULT',
        'DEFINE',
        'DELETE',
        'DENSE_RANK',
        'DEREF',
        'DESCRIBE',
        'DETERMINISTIC',
        'DISCONNECT',
        'DISTINCT',
        'DOUBLE',
        'DROP',
        'DYNAMIC',
        'EACH',
        'ELEMENT',
        'ELSE',
        'EMPTY',
        'END',
        'END-EXEC',
        'END_FRAME',
        'END_PARTITION',
        'EQUALS',
        'ESCAPE',
        'EVERY',
        'EXCEPT',
        'EXEC',
        'EXECUTE',
        'EXISTS',
        'EXP',
        'EXTERNAL',
        'EXTRACT',
        'FALSE',
        'FETCH',
        'FILTER',
        'FIRST_VALUE',
        'FLOAT',
        'FLOOR',
        'FOR',
        'FOREIGN',
        'FRAME_ROW',
        'FREE',
        'FROM',
        'FULL',
        'FUNCTION',
        'FUSION',
        'GET',
        'GLOBAL',
        'GRANT',
        'GREATEST',
        'GROUP',
        'GROUPING',
        'GROUPS',
        'HAVING',
        'HOLD',
        'HOUR',
        'IDENTITY',
        'IN',
        'INDICATOR',
        'INITIAL',
        'INNER',
        'INOUT',
        'INSENSITIVE',
        'INSERT',
        'INT',
        'INTEGER',
        'INTERSECT',
        'INTERSECTION',
        'INTERVAL',
        'INTO',
        'IS',
        'JOIN',
        'JSON',
        'JSON_ARRAY',
        'JSON_ARRAYAGG',
        'JSON_EXISTS',
        'JSON_OBJECT',
        'JSON_OBJECTAGG',
        'JSON_QUERY',
        'JSON_SCALAR',
        'JSON_SERIALIZE',
        'JSON_TABLE',
        'JSON_TABLE_PRIMITIVE',
        'JSON_VALUE',
        'LAG',
        'LANGUAGE',
        'LARGE',
        'LAST_VALUE',
        'LATERAL',
        'LEAD',
        'LEADING',
        'LEAST',
        'LEFT',
        'LIKE',
        'LIKE_REGEX',
        'LISTAGG',
        'LN',
        'LOCAL',
        'LOCALTIME',
        'LOCALTIMESTAMP',
        'LOG',
        'LOG10',
        'LOWER',
        'LPAD',
        'LTRIM',
        'MATCH',
        'MATCHES',
        'MATCH_NUMBER',
        'MATCH_RECOGNIZE',
        'MAX',
        'MEMBER',
        'MERGE',
        'METHOD',
        'MIN',
        'MINUTE',
        'MOD',
        'MODIFIES',
        'MODULE',
        'MONTH',
        'MULTISET',
        'NATIONAL',
        'NATURAL',
        'NCHAR',
        'NCLOB',
        'NEW',
        'NO',
        'NONE',
        'NORMALIZE',
        'NOT',
        'NTH_VALUE',
        'NTILE',
        'NULL',
        'NULLIF',
        'NUMERIC',
        'OCCURRENCES_REGEX',
        'OCTET_LENGTH',
        'OF',
        'OFFSET',
        'OLD',
        'OMIT',
        'ON',
        'ONE',
        'ONLY',
        'OPEN',
        'OR',
        'ORDER',
        'OUT',
        'OUTER',
        'OVER',
        'OVERLAPS',
        'OVERLAY',
        'PARAMETER',
        'PARTITION',
        'PATTERN',
        'PER',
        'PERCENT',
        'PERCENTILE_CONT',
        'PERCENTILE_DISC',
        'PERCENT_RANK',
        'PERIOD',
        'PORTION',
        'POSITION',
        'POSITION_REGEX',
        'POWER',
        'PRECEDES',
        'PRECISION',
        'PREPARE',
        'PRIMARY',
        'PROCEDURE',
        'PTF',
        'RANGE',
        'RANK',
        'READS',
        'REAL',
        'RECURSIVE',
        'REF',
        'REFERENCES',
        'REFERENCING',
        'REGR_AVGX',
        'REGR_AVGY',
        'REGR_COUNT',
        'REGR_INTERCEPT',
        'REGR_R2',
        'REGR_SLOPE',
        'REGR_SXX',
        'REGR_SXY',
        'REGR_SYY',
        'RELEASE',
        'RESULT',
        'RETURN',
        'RETURNS',
        'REVOKE',
        'RIGHT',
        'ROLLBACK',
        'ROLLUP',
        'ROW',
        'ROWS',
        'ROW_NUMBER',
        'RPAD',
        'RUNNING',
        'SAVEPOINT',
        'SCOPE',
        'SCROLL',
        'SEARCH',
        'SECOND',
        'SEEK',
        'SELECT',
        'SENSITIVE',
        'SESSION_USER',
        'SET',
        'SHOW',
        'SIMILAR',
        'SIN',
        'SINH',
        'SKIP',
        'SMALLINT',
        'SOME',
        'SPECIFIC',
        'SPECIFICTYPE',
        'SQL',
        'SQLEXCEPTION',
        'SQLSTATE',
        'SQLWARNING',
        'SQRT',
        'START',
        'STATIC',
        'STDDEV_POP',
        'STDDEV_SAMP',
        'SUBMULTISET',
        'SUBSET',
        'SUBSTRING',
        'SUBSTRING_REGEX',
        'SUCCEEDS',
        'SUM',
        'SYMMETRIC',
        'SYSTEM',
        'SYSTEM_TIME',
        'SYSTEM_USER',
        'TABLE',
        'TABLESAMPLE',
        'TAN',
        'TANH',
        'THEN',
        'TIME',
        'TIMESTAMP',
        'TIMEZONE_HOUR',
        'TIMEZONE_MINUTE',
        'TO',
        'TRAILING',
        'TRANSLATE',
        'TRANSLATE_REGEX',
        'TRANSLATION',
        'TREAT',
        'TRIGGER',
        'TRIM',
        'TRIM_ARRAY',
        'TRUE',
        'TRUNCATE',
        'UESCAPE',
        'UNION',
        'UNIQUE',
        'UNKNOWN',
        'UNNEST',
        'UPDATE',
        'UPPER',
        'USER',
        'USING',
        'VALUE',
        'VALUES',
        'VALUE_OF',
        'VARBINARY',
        'VARCHAR',
        'VARYING',
        'VAR_POP',
        'VAR_SAMP',
        'VERSIONING',
        'WHEN',
        'WHENEVER',
        'WHERE',
        'WIDTH_BUCKET',
        'WINDOW',
        'WITH',
        'WITHIN',
        'WITHOUT',
        'YEAR',
    ];

    /**
     * Based on https://www.sqlite.org/lang_keywords.html list.
     *
     * All these keywords must be quoted.
     */
    private const KEYWORDS_RESERVED_SQLITE = [
        'ABORT',
        'ACTION',
        'ADD',
        'AFTER',
        'ALL',
        'ALTER',
        'ALWAYS',
        'ANALYZE',
        'AND',
        'AS',
        'ASC',
        'ATTACH',
        'AUTOINCREMENT',
        'BEFORE',
        'BEGIN',
        'BETWEEN',
        'BY',
        'CASCADE',
        'CASE',
        'CAST',
        'CHECK',
        'COLLATE',
        'COLUMN',
        'COMMIT',
        'CONFLICT',
        'CONSTRAINT',
        'CREATE',
        'CROSS',
        'CURRENT',
        'CURRENT_DATE',
        'CURRENT_TIME',
        'CURRENT_TIMESTAMP',
        'DATABASE',
        'DEFAULT',
        'DEFERRABLE',
        'DEFERRED',
        'DELETE',
        'DESC',
        'DETACH',
        'DISTINCT',
        'DO',
        'DROP',
        'EACH',
        'ELSE',
        'END',
        'ESCAPE',
        'EXCEPT',
        'EXCLUDE',
        'EXCLUSIVE',
        'EXISTS',
        'EXPLAIN',
        'FAIL',
        'FILTER',
        'FIRST',
        'FOLLOWING',
        'FOR',
        'FOREIGN',
        'FROM',
        'FULL',
        'GENERATED',
        'GLOB',
        'GROUP',
        'GROUPS',
        'HAVING',
        'IF',
        'IGNORE',
        'IMMEDIATE',
        'IN',
        'INDEX',
        'INDEXED',
        'INITIALLY',
        'INNER',
        'INSERT',
        'INSTEAD',
        'INTERSECT',
        'INTO',
        'IS',
        'ISNULL',
        'JOIN',
        'KEY',
        'LAST',
        'LEFT',
        'LIKE',
        'LIMIT',
        'MATCH',
        'MATERIALIZED',
        'NATURAL',
        'NO',
        'NOT',
        'NOTHING',
        'NOTNULL',
        'NULL',
        'NULLS',
        'OF',
        'OFFSET',
        'ON',
        'OR',
        'ORDER',
        'OTHERS',
        'OUTER',
        'OVER',
        'PARTITION',
        'PLAN',
        'PRAGMA',
        'PRECEDING',
        'PRIMARY',
        'QUERY',
        'RAISE',
        'RANGE',
        'RECURSIVE',
        'REFERENCES',
        'REGEXP',
        'REINDEX',
        'RELEASE',
        'RENAME',
        'REPLACE',
        'RESTRICT',
        'RETURNING',
        'RIGHT',
        'ROLLBACK',
        'ROW',
        'ROWS',
        'SAVEPOINT',
        'SELECT',
        'SET',
        'TABLE',
        'TEMP',
        'TEMPORARY',
        'THEN',
        'TIES',
        'TO',
        'TRANSACTION',
        'TRIGGER',
        'UNBOUNDED',
        'UNION',
        'UNIQUE',
        'UPDATE',
        'USING',
        'VACUUM',
        'VALUES',
        'VIEW',
        'VIRTUAL',
        'WHEN',
        'WHERE',
        'WINDOW',
        'WITH',
        'WITHOUT',
    ];

    /**
     * @param 'reserved'|'reservedToplevel'|'reservedNewline'|'functions' $propertyName
     *
     * @return list<string>
     */
    private function getTokenizerList(string $propertyName): array
    {
        $tokenizerReflClass = new ReflectionClass(Tokenizer::class);
        /** @var list<string> $res */
        $res = $tokenizerReflClass->getProperty($propertyName)->getDefaultValue();

        return $res;
    }

    public function testInternalKeywordListsAreSortedForEasierMaintenance(): void
    {
        foreach (
            [
                $this->getTokenizerList('reserved'),
                $this->getTokenizerList('reservedToplevel'),
                $this->getTokenizerList('reservedNewline'),
                $this->getTokenizerList('functions'),
                self::KEYWORDS_RESERVED_MARIADB,
                self::KEYWORDS_RESERVED_MSSQL,
                self::KEYWORDS_RESERVED_MYSQL,
                self::KEYWORDS_RESERVED_ORACLE,
                self::KEYWORDS_RESERVED_POSTGRESQL,
                self::KEYWORDS_RESERVED_SQL2023,
                self::KEYWORDS_RESERVED_SQLITE,
            ] as $list
        ) {
            $listSorted = $list;
            sort($listSorted);

            self::assertSame($listSorted, $list);
        }
    }

    public function testKeywordsAreSingleUpperWord(): void
    {
        $tokenizerKeywords = array_unique(array_merge(
            $this->getTokenizerList('reserved'),
            $this->getTokenizerList('functions'),
        ));

        $kwsDiff = array_filter($tokenizerKeywords, static function ($v) {
            return $v !== strtoupper($v) || preg_match('~^\w+$~', $v) !== 1;
        });

        self::assertSame([], $kwsDiff);
    }

    public function testKeywordsAreDisjunctive(): void
    {
        $tokenizerKeywords = array_merge(
            $this->getTokenizerList('reserved'),
            $this->getTokenizerList('functions'),
        );

        self::assertSame(
            [],
            array_diff_key($tokenizerKeywords, array_unique($tokenizerKeywords)),
        );
    }

    public function testKeywordsReservedContainAllIntersectedReservedKeywords(): void
    {
        $tokenizerReserved = $this->getTokenizerList('reserved');

        self::assertSame(
            [],
            array_diff(array_unique(array_intersect(
                self::KEYWORDS_RESERVED_MARIADB,
                self::KEYWORDS_RESERVED_MSSQL,
                self::KEYWORDS_RESERVED_MYSQL,
                self::KEYWORDS_RESERVED_ORACLE,
                self::KEYWORDS_RESERVED_POSTGRESQL,
                self::KEYWORDS_RESERVED_SQL2023,
                self::KEYWORDS_RESERVED_SQLITE,
            )), $tokenizerReserved),
        );
    }

    /** @param list<Token> $expectedTokens */
    public static function assertEqualsTokens(array $expectedTokens, Cursor $cursor): void
    {
        $tokens = [];

        $cursor = $cursor->subCursor();

        while ($token = $cursor->next()) {
            $tokens[] = $token;
        }

        if (serialize($tokens) === serialize($expectedTokens)) { // optimize self::assertEquals() for large inputs
            self::assertTrue(true); // @phpstan-ignore staticMethod.alreadyNarrowedType
        } else {
            self::assertEquals($expectedTokens, $tokens);
        }
    }

    /** @param list<Token> $expectedTokens */
    #[DataProvider('tokenizeData')]
    public function testTokenize(array $expectedTokens, string $sql): void
    {
        self::assertEqualsTokens($expectedTokens, (new Tokenizer())->tokenize($sql));
    }

    /** @return Generator<mixed[]> */
    public static function tokenizeData(): Generator
    {
        yield 'empty' => [
            [],
            '',
        ];

        yield 'basic' => [
            [
                new Token(Token::TOKEN_TYPE_RESERVED_TOPLEVEL, 'select'),
                new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
                new Token(Token::TOKEN_TYPE_NUMBER, '1'),
            ],
            'select 1',
        ];

        yield 'there are no regressions' => [
            [
                new Token(Token::TOKEN_TYPE_BOUNDARY, '*'),
                new Token(Token::TOKEN_TYPE_BOUNDARY, '/'),
            ],
            '*/',
        ];

        yield 'unclosed quoted string' => [
            [
                new Token(Token::TOKEN_TYPE_QUOTE, '\'foo...'),
            ],
            '\'foo...',
        ];

        yield 'unclosed block comment' => [
            [
                new Token(Token::TOKEN_TYPE_BLOCK_COMMENT, '/* foo...'),
            ],
            '/* foo...',
        ];

        yield 'PostgreSQL operator' => [
            [
                new Token(Token::TOKEN_TYPE_RESERVED_TOPLEVEL, 'select'),
                new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
                new Token(Token::TOKEN_TYPE_WORD, 'json'),
                new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
                new Token(Token::TOKEN_TYPE_BOUNDARY, '#'),
                new Token(Token::TOKEN_TYPE_BOUNDARY, '>'),
                new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
                new Token(Token::TOKEN_TYPE_RESERVED, 'null'),
            ],
            'select json #> null',
        ];
    }

    public function testTokenizeLongConcat(): void
    {
        $count = 20_000;

        $sqlParts = [];
        for ($i = 0; $i < $count; $i++) {
            $sqlParts[] = 'cast(\'foo' . $i . '\' as blob)';
        }

        $concat = 'concat(' . implode(', ', $sqlParts) . ')';
        $sql    = 'select iif(' . $concat . ' = ' . $concat . ', 10, 20) x';

        $expectedTokens = [
            new Token(Token::TOKEN_TYPE_RESERVED_TOPLEVEL, 'select'),
            new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
            new Token(Token::TOKEN_TYPE_WORD, 'iif'),
            new Token(Token::TOKEN_TYPE_BOUNDARY, '('),
        ];

        for ($j = 0; $j < 2; $j++) {
            if ($j !== 0) {
                $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
                $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '=');
                $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
            }

            $expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'concat');
            $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '(');

            for ($i = 0; $i < $count; $i++) {
                if ($i !== 0) {
                    $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ',');
                    $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
                }

                $expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'cast');
                $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '(');
                $expectedTokens[] = new Token(Token::TOKEN_TYPE_QUOTE, '\'foo' . $i . '\'');
                $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
                $expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'as');
                $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
                $expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'blob');
                $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')');
            }

            $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')');
        }

        $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ',');
        $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
        $expectedTokens[] = new Token(Token::TOKEN_TYPE_NUMBER, '10');
        $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ',');
        $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
        $expectedTokens[] = new Token(Token::TOKEN_TYPE_NUMBER, '20');
        $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')');
        $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
        $expectedTokens[] = new Token(Token::TOKEN_TYPE_WORD, 'x');

        $this->testTokenize($expectedTokens, $sql);
    }
}
